Merge pull request #650 from chriseidhof/website-agent-merge-events

Make the website agent support merge events

Andrew Cantino 10 years ago
parent
commit
7472fd88e1
2 changed files with 66 additions and 50 deletions
  1. 56 50
      app/models/agents/website_agent.rb
  2. 10 0
      spec/models/agents/website_agent_spec.rb

+ 56 - 50
app/models/agents/website_agent.rb

@@ -74,7 +74,7 @@ module Agents
74 74
 
75 75
       The `headers` field is optional.  When present, it should be a hash of headers to send with the request.
76 76
 
77
-      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload.
77
+      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. If you specify `merge` as the mode, it will retain the old payload and update it with the new values.
78 78
 
79 79
       In Liquid templating, the following variable is available:
80 80
 
@@ -120,7 +120,7 @@ module Agents
120 120
 
121 121
       # Check for optional fields
122 122
       if options['mode'].present?
123
-        errors.add(:base, "mode must be set to on_change or all") unless %w[on_change all].include?(options['mode'])
123
+        errors.add(:base, "mode must be set to on_change, all or merge") unless %w[on_change all merge].include?(options['mode'])
124 124
       end
125 125
 
126 126
       if options['expected_update_period_in_days'].present?
@@ -148,66 +148,70 @@ module Agents
148 148
     end
149 149
 
150 150
     def check
151
-      check_url interpolated['url']
151
+      check_urls(interpolated['url'])
152 152
     end
153 153
 
154
-    def check_url(in_url)
154
+    def check_urls(in_url)
155 155
       return unless in_url.present?
156 156
 
157 157
       Array(in_url).each do |url|
158
-        log "Fetching #{url}"
159
-        response = faraday.get(url)
160
-        raise "Failed: #{response.inspect}" unless response.success?
161
-
162
-        interpolation_context.stack {
163
-          interpolation_context['_response_'] = ResponseDrop.new(response)
164
-          body = response.body
165
-          if (encoding = interpolated['force_encoding']).present?
166
-            body = body.encode(Encoding::UTF_8, encoding)
167
-          end
168
-          doc = parse(body)
158
+        check_url(url)
159
+      end
160
+    end
169 161
 
170
-          if extract_full_json?
171
-            if store_payload!(previous_payloads(1), doc)
172
-              log "Storing new result for '#{name}': #{doc.inspect}"
173
-              create_event :payload => doc
174
-            end
175
-            next
176
-          end
162
+    def check_url(url, payload = {})
163
+      log "Fetching #{url}"
164
+      response = faraday.get(url)
165
+      raise "Failed: #{response.inspect}" unless response.success?
177 166
 
178
-          output =
179
-            case extraction_type
180
-            when 'json'
181
-              extract_json(doc)
182
-            when 'text'
183
-              extract_text(doc)
184
-            else
185
-              extract_xml(doc)
186
-            end
167
+      interpolation_context.stack {
168
+        interpolation_context['_response_'] = ResponseDrop.new(response)
169
+        body = response.body
170
+        if (encoding = interpolated['force_encoding']).present?
171
+          body = body.encode(Encoding::UTF_8, encoding)
172
+        end
173
+        doc = parse(body)
187 174
 
188
-          num_unique_lengths = interpolated['extract'].keys.map { |name| output[name].length }.uniq
175
+        if extract_full_json?
176
+          if store_payload!(previous_payloads(1), doc)
177
+            log "Storing new result for '#{name}': #{doc.inspect}"
178
+            create_event payload: payload.merge(doc)
179
+          end
180
+          return
181
+        end
189 182
 
190
-          if num_unique_lengths.length != 1
191
-            raise "Got an uneven number of matches for #{interpolated['name']}: #{interpolated['extract'].inspect}"
183
+        output =
184
+          case extraction_type
185
+          when 'json'
186
+            extract_json(doc)
187
+          when 'text'
188
+            extract_text(doc)
189
+          else
190
+            extract_xml(doc)
192 191
           end
193 192
 
194
-          old_events = previous_payloads num_unique_lengths.first
195
-          num_unique_lengths.first.times do |index|
196
-            result = {}
197
-            interpolated['extract'].keys.each do |name|
198
-              result[name] = output[name][index]
199
-              if name.to_s == 'url'
200
-                result[name] = (response.env[:url] + result[name]).to_s
201
-              end
202
-            end
193
+        num_unique_lengths = interpolated['extract'].keys.map { |name| output[name].length }.uniq
194
+
195
+        if num_unique_lengths.length != 1
196
+          raise "Got an uneven number of matches for #{interpolated['name']}: #{interpolated['extract'].inspect}"
197
+        end
203 198
 
204
-            if store_payload!(old_events, result)
205
-              log "Storing new parsed result for '#{name}': #{result.inspect}"
206
-              create_event :payload => result
199
+        old_events = previous_payloads num_unique_lengths.first
200
+        num_unique_lengths.first.times do |index|
201
+          result = {}
202
+          interpolated['extract'].keys.each do |name|
203
+            result[name] = output[name][index]
204
+            if name.to_s == 'url'
205
+              result[name] = (response.env[:url] + result[name]).to_s
207 206
             end
208 207
           end
209
-        }
210
-      end
208
+
209
+          if store_payload!(old_events, result)
210
+            log "Storing new parsed result for '#{name}': #{result.inspect}"
211
+            create_event payload: payload.merge(result)
212
+          end
213
+        end
214
+      }
211 215
     rescue => e
212 216
       error "Error when fetching url: #{e.message}\n#{e.backtrace.join("\n")}"
213 217
     end
@@ -216,7 +220,9 @@ module Agents
216 220
       incoming_events.each do |event|
217 221
         interpolate_with(event) do
218 222
           url_to_scrape = event.payload['url']
219
-          check_url(url_to_scrape) if url_to_scrape =~ /^https?:\/\//i
223
+          next unless url_to_scrape =~ /^https?:\/\//i
224
+          check_url(url_to_scrape,
225
+                    interpolated['mode'].to_s == "merge" ? event.payload : {})
220 226
         end
221 227
       end
222 228
     end
@@ -238,7 +244,7 @@ module Agents
238 244
           end
239 245
         end
240 246
         true
241
-      when 'all', ''
247
+      when 'all', 'merge', ''
242 248
         true
243 249
       else
244 250
         raise "Illegal options[mode]: #{interpolated['mode']}"

+ 10 - 0
spec/models/agents/website_agent_spec.rb

@@ -521,6 +521,16 @@ fire: hot
521 521
 
522 522
         expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
523 523
       end
524
+
525
+      it "should support merging of events" do
526
+        expect {
527
+          @checker.options = @valid_options
528
+          @checker.options[:mode] = "merge"
529
+          @checker.receive([@event])
530
+        }.to change { Event.count }.by(1)
531
+        last_payload = Event.last.payload
532
+        expect(last_payload['link']).to eq('Random')
533
+      end
524 534
     end
525 535
   end
526 536